library(tidyverse)
library(ggraph)
library(tidygraph)
library(igraph)
library(data.table)
library(tidytable)
library(statnet)
gkl_actors = fread('gkl_actors_full.csv')

actors = fread('../estc-data-unified/estc-actors-unified/actors.tsv')

Bipartite graph, projected to actor links:

g = gkl_actors %>% filter(J_divergence<.42) %>% 
  filter(!is.na(actor_id)) %>% 
  group_by(estc_id, actor_id) %>% 
  summarise(diverg = mean(J_divergence)) %>% 
  graph_from_data_frame( directed=FALSE)
## `summarise()` has grouped output by 'estc_id'. You can override using the `.groups` argument.
V(g)$type <- bipartite_mapping(g)$type 

#gg <- unipartite_projection_attr(gkl_actors_graph, "diverg", FALSE)

gg = bipartite.projection(g)

Basic node-level stats:

Weighted degree of all in graph with < .42 threshold:

gg[[2]] %>%
  as_tbl_graph() %>% 
  activate(edges)  %>% 
  activate(nodes) %>% 
  mutate(wtd_degree = centrality_degree(weights = weight)) %>% 
  as_tibble() %>% 
  arrange(desc(wtd_degree)) %>%
  rename(actor_id = name) %>% 
  left_join(actors %>% 
              select(actor_id, name_unified, viaf_link), by = 'actor_id') 

Degree distribution:

gg[[2]] %>%
  as_tbl_graph() %>% 
  activate(edges)  %>% 
  activate(nodes) %>% 
  mutate(wtd_degree = centrality_degree(weights = weight)) %>% 
  as_tibble() %>%
  count(wtd_degree) %>%
  ggplot() + geom_point(aes(wtd_degree,n)) + scale_x_log10()+ scale_y_log10()
## Warning: Transformation introduced infinite values in continuous x-axis

Not normally distributed but not scale-free (on a power law) either.

Betweenness centrality (not weighted):

(The sum of all the shortest paths between every pair of nodes which pass through that node)

gg[[2]] %>%
  as_tbl_graph() %>% 
  activate(edges)  %>% 
  activate(nodes) %>% 
  mutate(betweenness = centrality_betweenness()) %>% 
  as_tibble() %>% 
  arrange(desc(betweenness)) %>%
  rename(actor_id = name) %>% 
  left_join(actors %>% 
              select(actor_id, name_unified, viaf_link), by = 'actor_id') 

Degree and Betweenness

These two metrics are often related:

gg[[2]] %>%
  as_tbl_graph() %>% 
  activate(edges)  %>% 
  activate(nodes) %>% 
  mutate(betweenness = centrality_betweenness()) %>% 
  mutate(degree = centrality_degree(weights = weight)) %>% 
  as_tibble() %>% 
  mutate(betweenness_rank = rank(-betweenness))%>% 
  mutate(degree_rank = rank(-degree)) %>% 
  ggplot(aes(betweenness_rank, degree_rank)) + geom_rect(aes(xmin = 0, xmax = 100, ymin = 50, ymax = 500), fill ='red', alpha = .1) + geom_point()

Plotting both and looking for outliers shows nodes with ‘surprisingly’ high betweenness rank considering their degree (looking in the highlighted area for a start) can find interesting ‘bridges’, not important in their own right but holding separate parts of the network together:

p = gg[[2]] %>%
  as_tbl_graph() %>% 
  activate(edges)  %>% 
  activate(nodes) %>% 
  mutate(betweenness = centrality_betweenness()) %>% 
  mutate(degree = centrality_degree(weights = weight)) %>% 
  as_tibble()%>%
  rename(actor_id = name) %>% 
  left_join(actors %>% 
              select(actor_id, name_unified, viaf_link), by = 'actor_id')  %>% 
  mutate(betweenness_rank = rank(-betweenness))%>% 
  mutate(degree_rank = rank(-degree)) %>% 
  ggplot(aes(betweenness_rank, degree_rank, text = name_unified)) + 
  geom_point() 

plotly::ggplotly(p)

For instance David Niven (16th for betweenness, 425th for degree):

niven_books = gkl_actors %>% filter(actor_id == 'davidniven_0') %>% pull(estc_id)

gkl_actors %>% 
  filter(estc_id %in% niven_books & J_divergence <.42) 

Who did they work with?

g = gkl_actors %>% filter(J_divergence<.42) %>% 
  filter(!is.na(actor_id)) %>% 
  group_by(estc_id, actor_id) %>% 
  summarise(diverg = mean(J_divergence)) %>% 
  graph_from_data_frame( directed=FALSE)
## `summarise()` has grouped output by 'estc_id'. You can override using the `.groups` argument.
V(g)$type <- bipartite_mapping(g)$type 

gg = bipartite.projection(g)

actor_net = gg[[2]]

names(neighbors(actor_net, 'davidniven_0')) %>% as_tibble()%>% 
  left_join(actors %>% 
              select(actor_id, name_unified, viaf_link), by = c('value' = 'actor_id'))

H.D. Symonds (19th for betweenness, 169th for degree):

symonds_books = gkl_actors %>% filter(actor_id == '62707926') %>% pull(estc_id)

gkl_actors %>% 
  filter(estc_id %in% symonds_books & J_divergence <.42) %>% 
  filter(!is.na(actor_id)) %>% 
  pull(actor_id)
##  [1] "bbti_34706"       "bbti_58231"       "62707926"         "62707926"        
##  [5] "bbti_64114"       "messrichardson_0" "bbti_57604"       "62707926"        
##  [9] "24680588"         "bbti_22175"       "62707926"         "bbti_58231"      
## [13] "16320887"         "62707926"         "bbti_5526"        "bbti_77753"      
## [17] "39231857"         "62707926"         "bbti_109535"      "peterhill_0"     
## [21] "17068965"         "62707926"         "bbti_97091"       "bbti_72631"      
## [25] "westandhughes_1"  "24680588"         "messrichardson_0" "bbti_22173"      
## [29] "62707926"         "62707926"         "bbti_45088"       "62707926"        
## [33] "bbti_5155"        "62707926"         "bbti_97091"       "bbti_34268"      
## [37] "62707926"         "bbti_34706"       "bbti_58231"       "62707926"        
## [41] "62707926"         "128307380"        "bbti_97091"       "bbti_59971"      
## [45] "bbti_109535"      "peterhill_0"      "17068965"         "62707926"        
## [49] "bbti_97091"       "bbti_72631"       "westandhughes_1"  "bbti_59971"      
## [53] "bbti_77744"       "bbti_62208"       "71631229"         "bbti_34268"      
## [57] "62707926"
names(neighbors(actor_net, '62707926')) %>% 
  as_tibble()%>% 
  left_join(actors %>% 
              select(actor_id, name_unified, viaf_link), by = c('value' = 'actor_id'))

J. Duncan and Son (22nd for betweenness, 397 for degree):

duncan_books = gkl_actors %>% filter(actor_id == 'jduncanandson_0') %>% pull(estc_id)

gkl_actors %>% 
  filter(estc_id %in% duncan_books & J_divergence <.42) 
names(neighbors(actor_net, 'jduncanandson_0')) %>% as_tibble() %>% 
  left_join(actors %>% 
              select(actor_id, name_unified, viaf_link), by = c('value' = 'actor_id'))

Eigenvector centrality:

(Scores a node’s centrality based on its connections to other important nodes). Might suggest book trade actors who were influential because of their connections, or because they ‘had the ear’ of important individuals.

actor_net %>%
  as_tbl_graph() %>% 
  mutate(eigen = centrality_eigen(weights = weight)) %>% 
  as_tibble() %>% arrange(desc(eigen))%>% 
  left_join(actors %>% 
              select(actor_id, name_unified, viaf_link), by = c('name' = 'actor_id'))

Some changes from highest degree e.g William Strahan is 16th highest by degree but 6th in eigenvector centrality.

Edge-level metrics:

Highest-weighted edges:

actor_net %>% as_tbl_graph() %>% activate(edges)%>% 
  mutate(to_name = .N()$name[to], 
         from_name = .N()$name[from]) %>% 
  as_tibble() %>% 
  select(from = from_name, to = to_name, weight) %>% 
  arrange(desc(weight)) %>% 
  left_join(actors %>% select(name_unified, actor_id), by = c('from' = 'actor_id'))%>% 
  left_join(actors%>% select(name_unified, actor_id), by = c('to' = 'actor_id'))

We could look at overlapping works for these pairs e.g:

c_works = gkl_actors %>% filter(J_divergence <.42& actor_id == '18758830') %>% pull(estc_id)

s_works = gkl_actors %>% filter(J_divergence <.42& actor_id == '39467138')%>% pull(estc_id)

intersect(c_works, s_works) %>% as_tibble() %>% 
  left_join(gkl_actors, by = c('value' = 'estc_id')) %>% filter(actor_id %in% c('18758830', '39467138'))

Time series approach:

Calculate degree scores for networks consisting of one year of data:

get_yearly_stats = function(df){
  
 g =  df %>% filter(J_divergence<.42) %>% 
  filter(!is.na(actor_id)) %>% 
  group_by(estc_id, actor_id) %>% 
  summarise(diverg = mean(J_divergence)) %>% 
  graph_from_data_frame( directed=FALSE)

V(g)$type <- bipartite_mapping(g)$type 

gg = bipartite.projection(g)

actor_net = gg[[2]]

stats = actor_net %>% as_tbl_graph() %>% 
  mutate(degree = centrality_degree(weights = weight)) %>% 
  mutate(between = centrality_betweenness()) %>% 
  mutate(louvain = group_louvain(weights = weight)) %>% 
  as_tibble()

stats

  
}

list_of_dfs = list()

for(i in 1700:1800){
  
  list_of_dfs[[as.character(i)]] = gkl_actors %>% 
    filter(publication_year == i)
}

results = map(list_of_dfs, 
      possibly(get_yearly_stats, otherwise = NA_character_) )

na.omit.list <- function(y) { return(y[!sapply(y, function(x) all(is.na(x)))]) }

results = rbindlist(results %>% na.omit.list, idcol = 'year')

Sum of degree scores by year:

results %>% mutate(year = as.numeric(year)) %>%
  count(year, wt = degree) %>% 
  ggplot() + geom_col(aes(year, n))

Communities:

Interactive exploratory map of communities found for all books with <.42 divergence: Labels are sized by degree score. Filtered to edges with a weight for more than 1.

g = actor_net %>% as_tbl_graph() %>% 
  mutate(louvain = group_louvain(weights = weight)) %>% 
  mutate(color = louvain)

filtered_g = g %>% 
  activate(edges) %>% filter(weight>1) %>% 
  activate(nodes) %>% 
  mutate(degree = centrality_degree(mode = 'all', weights=  weight)) %>% filter(degree>0) %>% left_join(actors, by = c('name' = 'actor_id')) %>% 
  mutate(actor_id = name) %>%
  mutate(name = paste0(name_unified, " (", name, ")")) %>% mutate(size =5 ) %>% 
  mutate(font.size =sqrt(degree))

visNetwork::visIgraph(filtered_g, layout = 'layout_with_kk', physics = T)%>% 
  visNetwork::visEdges(width = .01, color = list(opacity = .3))%>%
 visNetwork::visOptions(selectedBy = "louvain")

Looks like two ‘core’ communities, community 1 with Lowndes, Fauldner etc. I think some of these are generational communities - because there are father/son pairs in different communities.

To check:

comms = g %>% filter(louvain %in% 1:10) %>%as_tibble()

gkl_actors %>% left_join(comms, by = c('actor_id' = 'name')) %>% 
  count(louvain, publication_year) %>% filter(!is.na(louvain)) %>%  
  ggplot() + geom_col(aes(x = publication_year, y =n, fill = as.factor(louvain)))

Might be more meaningful to

Not all are just temporal. For example community 3 almost complete cut off, except for connection through Luke White to John Debrett. These are Dublin BT actors:

g %>% 
  filter(louvain ==3) %>% 
  pull(name) %>% as_tibble() %>% 
  inner_join(gkl_actors, by = c('value' = 'actor_id')) %>% count(publication_place)

What is the divergence profile of books worked on by actors in these communities?

louvain_df = g %>% as_tibble() %>% select(name, louvain)

gkl_actors  %>% 
  left_join(louvain_df, by = c('actor_id'= 'name')) %>%
  filter(louvain %in% 1:15) %>% 
  ggplot() + 
  geom_density(aes(J_divergence)) + 
  facet_wrap(~louvain, ncol = 3)

What authors did they publish on?

gkl_actors  %>% filter(J_divergence<.42) %>% 
  left_join(louvain_df, by = c('actor_id'= 'name')) %>%
  filter(louvain %in% 1:10) %>% 
  count(louvain, author) %>% 
  arrange(desc(n)) %>% 
  filter(!is.na(author)) %>% 
  group_by(louvain) %>% top_n(10, wt = n) %>%
  summarise(authors = paste0(author, " (", n, ")", collapse = "; "))